# Timing Matters: Climate Policies and Adaptive Resilience
# Main Models and Variable Selection
# D'Amico & Maboudi (2025)

# ============================================================================
# SETUP
# ============================================================================

rm(list = ls())

library(dplyr)
library(ggplot2)
library(lmtest)
library(parallel)
library(plm)
library(purrr)
library(randomForest)
library(readr)
library(sandwich)
library(doParallel)
library(foreach)

# Load cleaned data
pdata <- read_csv("pdata_clean.csv")

# Create interaction terms
pdata <- pdata %>%
  mutate(
    adaptation_plans_stock_max_populism = adaptation_plans_stock * max_populism,
    adaptation_plans_stock_adaptationaid_recipient = adaptation_plans_stock * adaptationaid_recipient,
    adaptation_plans_stock_envexp_pgdp = adaptation_plans_stock * envexp_pgdp
  )

# ============================================================================
# DATA SEGMENTATION (THREE PERIODS)
# ============================================================================

pdata_seg0 <- subset(pdata, year < 2006)
pdata_seg3 <- subset(pdata, year > 2006 & year <= 2015)
pdata_seg4 <- subset(pdata, year > 2015)

# Convert to panel data frames
pdata <- pdata.frame(pdata, index = c("iso3c", "year"))
pdata_seg0 <- pdata.frame(pdata_seg0, index = c("iso3c", "year"))
pdata_seg3 <- pdata.frame(pdata_seg3, index = c("iso3c", "year"))
pdata_seg4 <- pdata.frame(pdata_seg4, index = c("iso3c", "year"))

# ============================================================================
# RANDOM FOREST VARIABLE SELECTION
# ============================================================================

# Set up parallel processing
cl <- makeCluster(detectCores() - 1)
registerDoParallel(cl)

cat("Running Random Forest variable importance analysis...\n")

# Full predictor set
predictors_full <- c(
  "adaptation_plans_stock", "ROL_perc", "ln_gdp", 
  "percent_gdp_services", "imports_pGDP", "auton",
  "max_populism", "gdpgrowth_per", "global_north", 
  "disaster_count", "ifparis_bin", "kyoto_sig", "kyoto_rat",
  "paris_sig", "paris_rat", "ndc_sub", "if_adaptation",
  "financial_recession", "adaptationaid_recipient",
  "envexp_pgdp", "ln_gdp_square", 
  "adaptation_plans_stock_max_populism",
  "adaptation_plans_stock_adaptationaid_recipient", 
  "adaptation_plans_stock_envexp_pgdp"
)

# Segments for RF analysis
segments_data <- list(
  "All Years" = pdata,
  "< 2006" = pdata_seg0,
  "2007–2015" = pdata_seg3,
  "2016+" = pdata_seg4
)

# RF function
run_rf_optimized <- function(data, dep_var, predictors, segment_name = "") {
  set.seed(1995)
  
  data$year <- as.factor(data$year)
  rf_data <- data %>%
    select(all_of(c(dep_var, predictors))) %>%
    na.omit()
  
  if (nrow(rf_data) < 20) {
    warning(paste("Too few observations for", segment_name))
    return(NULL)
  }
  
  rf_model <- randomForest(
    x = rf_data[, predictors],
    y = rf_data[[dep_var]],
    importance = TRUE,
    ntree = 100,
    nodesize = 5,
    mtry = max(1, floor(sqrt(length(predictors))))
  )
  
  # Extract variable importance
  imp <- importance(rf_model)
  imp_df <- data.frame(
    Variable = rownames(imp),
    `%IncMSE` = imp[, "%IncMSE"]
  ) %>%
    arrange(desc(`%IncMSE`)) %>%
    mutate(
      cumulative_percent = cumsum(`%IncMSE`) / sum(`%IncMSE`) * 100
    )
  
  # Variables explaining 90% of variance
  top_90_vars <- imp_df %>%
    filter(cumulative_percent <= 90) %>%
    pull(Variable)
  
  if (length(top_90_vars) == 0) {
    top_90_vars <- imp_df$Variable[1]
  }
  
  return(list(
    top_90_vars = top_90_vars,
    importance_data = imp_df,
    segment = segment_name,
    n_vars_90 = length(top_90_vars),
    model = rf_model
  ))
}

# Run RF in parallel
rf_results <- foreach(
  seg_name = names(segments_data),
  .packages = c("randomForest", "dplyr"),
  .combine = list,
  .multicombine = TRUE
) %dopar% {
  run_rf_optimized(segments_data[[seg_name]], "lag1_capacity", predictors_full, seg_name)
}

names(rf_results) <- names(segments_data)
rf_results <- rf_results[!sapply(rf_results, is.null)]

# Stop parallel processing
stopCluster(cl)
registerDoSEQ()

# Display variable importance results
cat("\n=== RANDOM FOREST VARIABLE IMPORTANCE ===\n")
for (result in rf_results) {
  cat("\nSegment:", result$segment, "\n")
  cat("Variables explaining 90% variance:", result$n_vars_90, "\n")
  cat("Top variables:\n")
  print(head(result$importance_data, 10))
}

# Save RF results
saveRDS(rf_results, "rf_variable_importance.rds")

# ============================================================================
# DEFINE PREDICTOR SETS (FROM RF RESULTS)
# ============================================================================

# Based on RF variable importance analysis (see Appendix E)

predictors_All_Years <- c(
  "adaptation_law_stock",
  "imports_pGDP",
  "max_populism", 
  "ROL_perc",
  "percent_gdp_services",
  "auton",
  "disaster_count",
  "ln_gdp",
  "gdpgrowth_per",
  "adaptationaid_recipient"
)

predictors__2005 <- c(
  "adaptation_law_stock",
  "imports_pGDP",
  "max_populism",
  "auton",
  "ROL_perc",
  "percent_gdp_services",
  "ln_gdp"
)

predictors_2007_2015 <- c(
  "adaptation_law_stock",
  "adaptation_law_stock:adaptationaid_recipient",
  "ln_gdp",
  "max_populism",
  "percent_gdp_services", 
  "imports_pGDP",
  "adaptationaid_recipient",
  "auton"
)

predictors_2016_ <- c(
  "adaptation_plans_stock",
  "adaptation_plans_stock:max_populism",
  "imports_pGDP",
  "ROL_perc",
  "auton",
  "ln_gdp",
  "max_populism"
)

# ============================================================================
# MAIN REGRESSION MODELS
# ============================================================================

# Model function
run_model <- function(data, predictors, dependent_var) {
  formula_str <- paste(dependent_var, "~", paste(predictors, collapse = " + "))
  model_formula <- as.formula(formula_str)
  
  # Time fixed effects model
  model_fe <- plm(
    formula = model_formula,
    data = data,
    index = c("iso3c", "year"),
    model = "within",
    effect = "time",
    vcov = "HC1"
  )
  
  # Cluster-robust standard errors
  cov_fe <- vcovHC(model_fe, method = "arellano", type = "HC1", cluster = "group")
  coef_results <- coeftest(model_fe, vcov = cov_fe)
  
  return(list(
    model = model_fe,
    coef_test = coef_results,
    robust_vcov = cov_fe
  ))
}

# Define datasets and predictors
datasets <- list(
  "pdata" = list(data = pdata, predictors = predictors_All_Years),
  "pdata_seg0" = list(data = pdata_seg0, predictors = predictors__2005),
  "pdata_seg3" = list(data = pdata_seg3, predictors = predictors_2007_2015),
  "pdata_seg4" = list(data = pdata_seg4, predictors = predictors_2016_)
)

# Dependent variables by dataset
dependent_vars_by_dataset <- list(
  "pdata" = c("lag1_capacity", "lag7_capacity"),
  "pdata_seg0" = c("lag1_capacity", "lag7_capacity"),
  "pdata_seg3" = c("lag1_capacity", "lag7_capacity"),
  "pdata_seg4" = c("lag1_capacity", "lag5_capacity")
)

# Run models
results <- list()

cat("\n=== RUNNING MAIN REGRESSION MODELS ===\n")

for (dataset_name in names(datasets)) {
  cat("\n", paste(rep("=", 60), collapse=""), "\n")
  cat("DATASET:", dataset_name, "\n")
  cat(paste(rep("=", 60), collapse=""), "\n")
  
  dataset_info <- datasets[[dataset_name]]
  data <- dataset_info$data
  predictors <- dataset_info$predictors
  dependent_vars <- dependent_vars_by_dataset[[dataset_name]]
  
  results[[dataset_name]] <- list()
  
  for (dep_var in dependent_vars) {
    cat("\nDependent Variable:", dep_var, "\n")
    
    tryCatch({
      model_results <- run_model(data, predictors, dep_var)
      results[[dataset_name]][[dep_var]] <- model_results
      
      cat("\nModel Summary:\n")
      print(summary(model_results$model))
      
      cat("\nCluster-Robust Results:\n")
      print(model_results$coef_test)
      
    }, error = function(e) {
      cat("Error:", e$message, "\n")
      results[[dataset_name]][[dep_var]] <- NULL
    })
  }
}

# Save model results
saveRDS(results, "model_results.rds")

cat("\n=== MODELS COMPLETE ===\n")
cat("Results saved to model_results.rds\n")
